import os
import json
import random
import shutil
from PIL import Image
from pycocotools.coco import COCO

# 定义数据集路径
data_dir = r'G:\coco\train2017'  # 替换为你的 COCO 数据集路径
annotation_file = r'G:\coco\annotations\instances_train2017.json'  # 替换为你的标注文件路径

# 随机数种子
random.seed(42)

# 初始化 COCO API
coco = COCO(annotation_file)

# 获取所有图像的ID
all_image_ids = coco.getImgIds()

# 计算10%的样本数量
num_samples = int(len(all_image_ids) * 0.1)

# 随机选择子数据集的图像ID
subset_image_ids = random.sample(all_image_ids, num_samples)

# 创建保存下采样图像和标注的文件夹
save_folder = r'D:\Desktop\n\train2017'  # 替换为保存图像和标注的文件夹路径
os.makedirs(save_folder, exist_ok=True)

# 存储所有标注的字典
annotations_dict = {
    "images": [],
    "annotations": []
}

# 将图像和标注保存到对应文件夹中
for i, image_id in enumerate(subset_image_ids):
    try:
        # 获取图像信息
        image_info = coco.loadImgs(image_id)[0]
        image_path = os.path.join(data_dir, image_info['file_name'])

        # 检查图像文件是否存在
        if os.path.exists(image_path):
            image = Image.open(image_path).convert("RGB")

            # 获取图像的标注信息
            annotation_ids = coco.getAnnIds(imgIds=image_id)
            annotations_info = coco.loadAnns(annotation_ids)

            # 生成新的文件名（使用 Coco 数据集规范）
            base_name = f'{image_info["file_name"].split(".")[0]}_{i:06d}'
            image_filename = f'{base_name}.jpg'

            # 复制图像到图像保存文件夹
            image_dest = os.path.join(save_folder, 'images', image_filename)
            os.makedirs(os.path.join(save_folder, 'images'), exist_ok=True)
            shutil.copyfile(image_path, image_dest)

            # 添加图像信息到"images"列表
            image_info = {
                'id': image_id,
                'file_name': os.path.join('images', image_filename),
                # 其他图像信息，例如"width"、"height"等
            }
            annotations_dict["images"].append(image_info)

            # 添加标注信息到"annotations"列表
            for annotation_info in annotations_info:
                annotation_info["image_id"] = image_id
                annotations_dict["annotations"].append(annotation_info)

        else:
            print(f"Image file not found for image ID {image_id}. Ignoring this sample.")
    except Exception as e:
        print(f"Error processing sample at image ID {image_id} ({image_path}): {e}")

# 构建保存标注文件的路径
annotation_save_file = os.path.join(save_folder, 'annotations.json')

# 保存所有标注到一个 JSON 文件
try:
    with open(annotation_save_file, 'w') as f:
        json.dump(annotations_dict, f)
    print("下采样完成，图像和标注文件保存在:", save_folder)
except PermissionError:
    print(f"PermissionError: 无法保存标注文件到 {annotation_save_file}，请检查文件夹权限。")